In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

Explore the data and distributions¶

Load data and covert to appropriate types

In [3]:
# load data
site_data = pd.read_csv('data/SiteData.csv').convert_dtypes()
display(site_data.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13221 entries, 0 to 13220
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TIMESTAMP  13221 non-null  string 
 1    READING   13221 non-null  string 
 2    VALUE     13221 non-null  Float64
dtypes: Float64(1), string(2)
memory usage: 322.9 KB
None

change timestap to datetime¶

In [4]:
site_data['TIMESTAMP'] = pd.to_datetime(site_data['TIMESTAMP'])
site_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13221 entries, 0 to 13220
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   TIMESTAMP  13221 non-null  datetime64[ns]
 1    READING   13221 non-null  string        
 2    VALUE     13221 non-null  Float64       
dtypes: Float64(1), datetime64[ns](1), string(1)
memory usage: 322.9 KB
In [5]:
site_data.head()
Out[5]:
TIMESTAMP READING VALUE
0 2020-08-24 00:00:00 PX_GENERATOR_POWER 0.0
1 2020-08-24 00:00:00 PX_RECTIFIER_LOAD 2.3867
2 2020-08-24 00:00:00 PX_GENERATOR_1_FUEL_LEVEL 400.43
3 2020-08-24 00:30:00 PX_GENERATOR_POWER 0.0
4 2020-08-24 00:30:00 PX_RECTIFIER_LOAD 2.4072

there are empty spaces in column names, fix that¶

In [6]:
site_data.columns = site_data.columns.str.strip()
In [7]:
site_data['TIMESTAMP'].dt.month.unique()
Out[7]:
array([ 8,  9, 10, 11], dtype=int32)
In [8]:
site_data['READING'].unique()
Out[8]:
<StringArray>
['PX_GENERATOR_POWER', 'PX_RECTIFIER_LOAD', 'PX_GENERATOR_1_FUEL_LEVEL']
Length: 3, dtype: string
In [9]:
fig = px.box(y=site_data['VALUE'], x = site_data['READING'], color=site_data['READING'], log_y=True, title='Statistics of each reading', labels={'y': 'value', 'x': 'reading'})
fig.update_layout()
fig.show()
In [10]:
generator_power = site_data[site_data['READING'] == 'PX_GENERATOR_POWER']
In [11]:
px.histogram(generator_power['VALUE'], log_y=True)
In [12]:
rectifier_load = site_data[site_data['READING'] == 'PX_RECTIFIER_LOAD']
In [13]:
px.histogram(rectifier_load['VALUE'], log_y=True)
In [14]:
fuel_level = site_data[site_data['READING'] == 'PX_GENERATOR_1_FUEL_LEVEL']
In [15]:
px.histogram(fuel_level['VALUE'])
In [16]:
rectifier_load[rectifier_load['VALUE'] < 0.6]
Out[16]:
TIMESTAMP READING VALUE
6496 2020-10-08 07:00:00 PX_RECTIFIER_LOAD 0.4999
6499 2020-10-08 07:30:00 PX_RECTIFIER_LOAD 0.5661
In [17]:
# Create subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, 
                    subplot_titles=['PX_GENERATOR_POWER', 
                                    'PX_RECTIFIER_LOAD',
                                    'PX_GENERATOR_1_FUEL_LEVEL'])

# Add traces for PX_GENERATOR_POWER
data = site_data[site_data['READING'] == 'PX_GENERATOR_POWER']
fig.add_trace(go.Scatter(x=data['TIMESTAMP'], y=data['VALUE'], mode='lines+markers', name='PX_GENERATOR_POWER'), row=1, col=1)

# Add traces for PX_RECTIFIER_LOAD
data = site_data[site_data['READING'] == 'PX_RECTIFIER_LOAD']
fig.add_trace(go.Scatter(x=data['TIMESTAMP'], y=data['VALUE'], mode='lines+markers', name='PX_RECTIFIER_LOAD'), row=2, col=1)

# Add traces for PX_GENERATOR_1_FUEL_LEVEL
data = site_data[site_data['READING'] == 'PX_GENERATOR_1_FUEL_LEVEL']
fig.add_trace(go.Scatter(x=data['TIMESTAMP'], y=data['VALUE'], mode='lines+markers', name='PX_GENERATOR_1_FUEL_LEVEL'), row=3, col=1)

# Update layout
fig.update_layout(
    title='Time Series for Each READING',
    xaxis_title='Timestamp',
    yaxis_title='Value',
    height=900,
    xaxis_rangeslider_visible=False
)

fig.show()
In [18]:
# Create subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=False, 
                    subplot_titles=['PX_GENERATOR_POWER', 
                                    'PX_RECTIFIER_LOAD',
                                    'PX_GENERATOR_1_FUEL_LEVEL'])

# Add traces for PX_GENERATOR_POWER
data = site_data[site_data['READING'] == 'PX_GENERATOR_POWER']
fig.add_trace(go.Histogram(x=data['VALUE'], name='PX_GENERATOR_POWER', opacity=0.75), row=1, col=1)

# Add traces for PX_RECTIFIER_LOAD
data = site_data[site_data['READING'] == 'PX_RECTIFIER_LOAD']
fig.add_trace(go.Histogram(x=data['VALUE'], name='PX_RECTIFIER_LOAD', opacity=0.75), row=2, col=1)

# Add traces for PX_GENERATOR_1_FUEL_LEVEL
data = site_data[site_data['READING'] == 'PX_GENERATOR_1_FUEL_LEVEL']
fig.add_trace(go.Histogram(x=data['VALUE'], name='PX_GENERATOR_1_FUEL_LEVEL', opacity=0.75), row=3, col=1)

# Update layout
fig.update_layout(
    title='Histogram for Each READING',
    height=900,
    bargap=0.2,
    bargroupgap=0.1,
    yaxis_type ='log',
    yaxis2_type = 'log'
)

fig.update_xaxes(title_text="Value", row=1, col=1)
fig.update_xaxes(title_text="Value", row=2, col=1)
fig.update_xaxes(title_text="Value", row=3, col=1)

fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Count", row=2, col=1)
fig.update_yaxes(title_text="Count", row=3, col=1)

fig.show()
In [ ]: